quarto preview “c:/Users/pietr/OneDrive - City St George’s, University of London/Documents/a2025-2026 BAYES/DABF - Data Analytics for Banking & Finance/DABF_code/Groupwork/Groupwork.ipynb” –to html –no-watch-inputs –execute
Show code
from scipy.stats import jarque_berafrom statsmodels.tsa.stattools import adfullerfrom datetime import datetimefrom statsmodels.api import OLS, add_constantfrom great_tables import GTimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsprint("Current Time:", datetime.now())
Current Time: 2025-11-27 13:19:15.903080
PRICES
Show code
from pathlib import PathPATH = Path("C:/Users/pietr/OneDrive - City St George's, University of London/Documents/a2025-2026 BAYES/DABF - Data Analytics for Banking & Finance/DABF_code/Groupwork/Data-Analytics-For-Banking/DF.xlsx")df = pd.read_excel(PATH, sheet_name="PRICES GBP")# df['Date'] = pd.to_datetime(df['Date'], unit='D')df.set_index('Date', inplace=True)df = df.astype(float)# Inspect the datadf = df.dropna()df.round(4)
AMRC US Equity 0.634550
FSLR US Equity 0.445022
CVX US Equity 0.210595
VWS DC Equity 0.194381
ENI IM Equity 0.193326
SHEL LN Equity 0.119714
NEE US Equity 0.073968
EQNR NO Equity 0.053911
XOM US Equity 0.010326
TTE FP Equity -0.003804
dtype: float64
DAYS = pd.DataFrame({"Name":returns.columns, "Negative":returns.idxmin(axis=0), "Positive":returns.idxmax(axis=0), #"Difference": returns.idxmax(axis=0)-returns.idxmin(axis=0)})( GT(DAYS) .tab_header(title="Date of Largest Daily Move") .opt_stylize(5, color ="gray"))
import ptitprince as ptmelted = returns.melt(var_name="Asset", value_name="Return")plt.figure(figsize=(14, 6))pt.half_violinplot( x="Asset", y="Return", data=melted, palette="tab10", bw=.2, cut=0., scale="area", inner=None, orient="v")sns.boxplot( data=melted, x="Asset", y="Return", width=0.2, palette="tab10", showcaps=True, flierprops =dict(marker='o', markerfacecolor='black', markersize=2, linestyle='none')# showfliers=False)# 1. Horizontal Line at y=0 (Zero Return)plt.axhline( y=0, color='black', linestyle='--', linewidth=0.8, alpha=0.7, label='Zero Return'# Adding a label is good practice)# 2. Vertical Lines to separate Assets/Boxplotsfor i inrange(9): # 10 - 1 plt.axvline( x=i +0.5, # Place the line exactly between asset i and asset i+1 color='gray', linestyle='-', linewidth=0.5, alpha=0.5 )plt.title("Violin + Boxplot of Asset Returns")plt.xticks(rotation=45)plt.tight_layout()plt.show()
Show code
avgstd = np.mean(desc_stats["Std"])df2 = df.drop("SPX Index", axis=1)plt.figure(figsize=(14, 6))for company in returns.columns: plt.plot((np.cumsum(returns[company])))plt.legend(labels=returns.columns,loc='center left', bbox_to_anchor=(1.02, 0.5), ncol=1,frameon=False)plt.title("Cumulative Performance of discrete returns", fontsize=16)plt.xlabel("Date", fontsize=16)plt.ylabel("Cumulative returns", fontsize=16)plt.grid(True, linestyle='--', alpha=0.6)plt.xticks(fontsize=14) # shrink labelsplt.yticks(fontsize=14) # shrink labelsplt.tight_layout() # makes space for title/axesplt.subplots_adjust(right=0.8) # extra: leaves space for legendplt.show()low =1mid =2high =3#| label: cumulative_performanceplt.figure(figsize=(14, 6))for company in returns.columns: plt.plot(returns[company])plt.legend(labels=returns.columns,loc='center left', bbox_to_anchor=(1.02, 0.5), ncol=1,frameon=False)plt.title("Daily discrete returns of companies last year", fontsize=16)plt.xlabel("Date", fontsize=16)# plt.axhline(low*avgstd, color='red', linestyle='--', label='Market Beta = 1')# plt.axhline(mid*avgstd, color='red', linestyle='--', label='Market Beta = 1')# plt.axhline(high*avgstd, color='red', linestyle='--', label='Market Beta = 1')# plt.axhline(-low*avgstd, color='red', linestyle='--', label='Market Beta = 1')# plt.axhline(-mid*avgstd, color='red', linestyle='--', label='Market Beta = 1')# plt.axhline(-high*avgstd, color='red', linestyle='--', label='Market Beta = 1')plt.ylabel("Discrete returns", fontsize=16)plt.grid(True, linestyle='--', alpha=0.6)plt.xticks(fontsize=14) # shrink labelsplt.yticks(fontsize=14) # shrink labelsplt.tight_layout() # makes space for title/axesplt.subplots_adjust(right=0.8) # extra: leaves space for legendplt.show()plt.figure(figsize=(14, 6))for company in df2.columns: plt.plot(df2[company])plt.legend(labels=df2.columns,loc='center left', bbox_to_anchor=(1.02, 0.5), ncol=1,frameon=False)plt.title("Cumulative Performance of discrete returns", fontsize=16)plt.xlabel("Date", fontsize=16)plt.ylabel("Cumulative returns", fontsize=16)plt.grid(True, linestyle='--', alpha=0.6)plt.xticks(fontsize=14) # shrink labelsplt.yticks(fontsize=14) # shrink labelsplt.tight_layout() # makes space for title/axesplt.subplots_adjust(right=0.8) # extra: leaves space for legendplt.show()print("Day with the lowest returns")returns.idxmin(axis=0).sort_values()
Day with the lowest returns
VWS DC Equity 2024-11-06
AMRC US Equity 2025-02-28
CVX US Equity 2025-04-03
SHEL LN Equity 2025-04-04
TTE FP Equity 2025-04-04
ENI IM Equity 2025-04-07
XOM US Equity 2025-04-10
FSLR US Equity 2025-06-17
EQNR NO Equity 2025-06-24
NEE US Equity 2025-07-23
dtype: datetime64[ns]
Show code
cum_perf = (1+ returns).cumprod() -1+100plt.figure(figsize=(14, 6))for company in cum_perf.columns: plt.plot(cum_perf.index, cum_perf[company], label=company)plt.legend(loc='center left', bbox_to_anchor=(1.02, 0.5), ncol=1, frameon=False)plt.title("Cumulative Performance of Stocks", fontsize=16)plt.xlabel("Date", fontsize=16)plt.ylabel("Cumulative Return", fontsize=16)plt.grid(True, linestyle='--', alpha=0.6)plt.xticks(fontsize=14)plt.yticks(fontsize=14)plt.tight_layout()plt.subplots_adjust(right=0.8)plt.show()
Show code
sorted_beta = beta_df.sort_values('Beta', ascending=False)mycolors = ['red'if v <0else'steelblue'for v in sorted_beta['Beta']]ax = sorted_beta.plot(kind='bar', color=mycolors, legend=False, figsize=(9, 6))plt.axhline(1, color='red', linestyle='--', label='Market Beta = 1')plt.title('Market Beta per Company')plt.ylabel('Beta')plt.legend()plt.tight_layout()plt.show()
plt.figure(figsize=(10, 8))sns.heatmap(returns.corr(method ='spearman') - returns.corr(method ='pearson'), annot=True, cmap='coolwarm', fmt=".2f", annot_kws={"size": 10})plt.title("Difference in correlation Matrix of Returns")
Text(0.5, 1.0, 'Difference in correlation Matrix of Returns')
Show code
plt.figure(figsize=(10, 8))cmap = sns.diverging_palette(220, 20, as_cmap=True) # alternative: sns.color_palette("vlag", as_cmap=True)sns.heatmap( full_returns.corr(), annot=True, fmt=".2f", cmap="coolwarm", vmin=-1, vmax=1, # force scale from -1 to 1 center=0, # center the diverging palette at 0 annot_kws={"size": 9}, square=True, linewidths=0.0, cbar_kws={"shrink": 0.8, "label": "Correlation"})plt.title("Correlation Matrix of Returns")plt.tight_layout()plt.show()
Show code
import matplotlib.pyplot as plt# Assuming 'returns' DataFrame is already defined, and 'window' is set to 30window =30rolling_mean = returns.rolling(window=window).mean()rolling_vol = returns.rolling(window=window).std()# --- Chart 1: Rolling Mean ---plt.figure(figsize=(12, 6))for col in returns.columns:# Plotting each asset's rolling mean plt.plot(rolling_mean.index, rolling_mean[col], linewidth=1.5, label=col)plt.title(f'{window}-Day Rolling Mean of Returns', fontsize=14)plt.xlabel('Date')plt.ylabel('Rolling Mean Return')plt.grid(True, linestyle='--', alpha=0.6)# Legend for Chart 1plt.legend( title='Asset', loc='center left', bbox_to_anchor=(1.02, 0.5), # Moves the legend outside the plot area frameon=False# Ensure a background for the legend for clarity)plt.tight_layout() # Adjust layout to make room for the legendplt.show()# --- Chart 2: Rolling Volatility ---plt.figure(figsize=(12, 6))for col in returns.columns:# Plotting each asset's rolling volatility plt.plot(rolling_vol.index, rolling_vol[col], linewidth=1.5, label=col)plt.title(f'{window}-Day Rolling Volatility (Standard Deviation)', fontsize=14)plt.xlabel('Date')plt.ylabel('Rolling Volatility (Std Dev)')plt.grid(True, linestyle='--', alpha=0.6)# Legend for Chart 2 (identical placement)plt.legend( title='Asset', loc='center left', bbox_to_anchor=(1.02, 0.5), frameon=False)plt.tight_layout()plt.show()
#! melted data / panel for lmplotg = sns.lmplot( data=df_panel, x='PE', y='MKT', hue='Company', # color = company scatter=True, # geom_point aspect=1.5, # aspect ratio height=6, # height ci=None# Set to None to hide the confidence band for cleaner look)# --- 3. Apply Log Scales and Final Formatting ---# g.ax.set_xscale('log')# g.ax.set_yscale('log')# Re-label the axes to reflect the log scaleg.set_axis_labels("PE Ratio (Log Scale)", "Market Cap (Log Scale)")g.fig.suptitle("PE Ratio vs. Market Cap: Separate Trend Line Per Company", y=1.02)plt.grid(True, which="both", ls="--", alpha=0.6)# Adjust the legend positiong.legend.set_title("Company")g.legend.set_bbox_to_anchor((1.05, 0.5))
Scatter with groups
Show code
def Quick_scatter(Data1, Data2, label1, label2, ylog=False, xlog=False, panelreg =False, groupreg =True):# Collect all data for panel regression all_data1 = [] all_data2 = [] plt.figure(figsize=(12, 6))if groupreg:# ---- classify companies ---- green_status = {"CVX US Equity": "no","FSLR US Equity": "yes","ENI IM Equity": "some","XOM US Equity": "no","SHEL LN Equity": "no","VWS DC Equity": "yes","EQNR NO Equity": "some","TTE FP Equity": "some","AMRC US Equity": "yes","NEE US Equity": "yes" }# If you want: define green = yes only green_companies = [c for c,s in green_status.items() if s =="yes"] non_green_companies = [c for c,s in green_status.items() if s =="no"] some_companies = [c for c,s in green_status.items() if s =="some"] # optional# Also prepare lists for green/non-green group regressions green_x = [] green_y = [] nongreen_x = [] nongreen_y = [] some_x = [] some_y = []# --- Plot scatter + company-level regressions ---for company in company_columns: Data1_series = Data1[company] Data2_series = Data2[company] all_data1.append(Data1_series) all_data2.append(Data2_series)if groupreg:# Add to correct groupif company in green_companies: green_x.append(Data1_series) green_y.append(Data2_series)elif company in non_green_companies: nongreen_x.append(Data1_series) nongreen_y.append(Data2_series)elif company in some_companies: some_x.append(Data1_series) some_y.append(Data2_series)# Company scatter plt.scatter(Data1_series, Data2_series, label=company)# Company individual regression# sns.regplot(# x=Data1_series.values,# y=Data2_series.values,# scatter=False,# ci=None,# line_kws={'alpha': 0.6, 'linewidth': 1}# )# --- Convert group lists ---if groupreg: gx = pd.concat(green_x).values if green_x elseNone gy = pd.concat(green_y).values if green_y elseNone nx = pd.concat(nongreen_x).values if nongreen_x elseNone ny = pd.concat(nongreen_y).values if nongreen_y elseNone sx = pd.concat(some_x).values if some_x elseNone sy = pd.concat(some_y).values if some_y elseNoneif panelreg:# --- Panel-wide regression --- all_x = pd.concat(all_data1).values all_y = pd.concat(all_data2).values sns.regplot( x=all_x, y=all_y, scatter=False, ci=None, label='Panel-Wide Regression', line_kws={'color': 'blue', 'linewidth': 4, 'alpha': 0.2} )if groupreg:# --- Group Regressions ---if gx isnotNone: sns.regplot( x=gx, y=gy, scatter=False, ci=None, label='Aligned Companies', line_kws={'color': 'green', 'linewidth': 5, 'alpha': 0.2} )if nx isnotNone: sns.regplot( x=nx, y=ny, scatter=False, ci=None, label='Unprepared Companies', line_kws={'color': 'red', 'linewidth': 5, 'alpha': 0.2} )if sx isnotNone: sns.regplot( x=sx, y=sy, scatter=False, ci=None, label='Developing Companies', line_kws={'color': 'gray', 'linewidth': 5, 'alpha': 0.2} )# --- Formatting ---if xlog: plt.xscale('log')if ylog: plt.yscale('log') plt.title(f'{label1} vs. {label2} (with group regressions)', fontsize=14) plt.xlabel(label1) plt.ylabel(label2) plt.grid(True, ls="--", alpha=0.6) plt.legend(loc='center left', bbox_to_anchor=(1.02, 0.5), frameon=False) plt.tight_layout()Quick_scatter(PE, MKT, "PE ratio", "Market cap", ylog =False, panelreg =True)Quick_scatter(EPS, MKT, "Earnings per share", "Market cap", ylog =False, panelreg =True)Quick_scatter(EPS, PE, "Earnings per share", "PE ratio", panelreg =True)Quick_scatter(ROE, ROA, "Returns on Equity", "Returns on Assets", panelreg =True)# Quick_scatter_1reg(ESG, PE, "ESG score", "PE ratio")# Quick_scatter_1reg(ESG, EPS, "ESG score", "Earnings per share")# Quick_scatter_1reg(ESG, ROE, "ESG score", "Returns on Equity")# Quick_scatter_1reg(ESG, ROA, "ESG score", "Returns on Assets")
without panel regressions
Show code
Quick_scatter(PE, MKT, "PE ratio", "Market cap")Quick_scatter(EPS, MKT, "Earnings per share", "Market cap")Quick_scatter(EPS, PE, "Earnings per share", "PE ratio")Quick_scatter(ROE, ROA, "Returns on Equity", "Returns on Assets")
Show code
def basic_plot(data, NAME): fig, ax = plt.subplots(figsize=(12, 6))# Plot all columns (companies) against the index (quarters)# This is much simpler than the bar chart, as Matplotlib handles the# X-axis scaling automatically for line plots on an index. data.plot(kind='line', ax=ax)# --- 3. CUSTOMIZING THE CHART ---# Add titles and labels ax.set_xlabel("Quarter", fontsize=12) ax.set_ylabel(NAME.upper(), fontsize=12) ax.set_title(f"{NAME.capitalize()} Trend of 10 Companies Across 4 Quarters", fontsize=14, fontweight='bold' )# Customize the legend: place it outside the plot area ax.legend( title="Companies", bbox_to_anchor=(1.05, 0.5), loc='center left', borderaxespad=0, frameon=False )# Ensure only the quarter labels are shown on the X-axis ax.set_xticks(range(4)) ax.set_xticklabels(quarters)# Add a grid for better readability ax.grid(axis='both', linestyle='-', alpha=0.6)# Display the plot plt.tight_layout() plt.show()basic_plot(PE, "P/E ratio")basic_plot(MKT, "market capitalisation")basic_plot(EPS, "Earnings per share")# basic_plot(ROA, "return on assets")# basic_plot(ROE, "return on equity")# basic_plot(ESG, "Overall esg score")
Show code
def clustered_bar_chart(df, name): quarters = ["Q4 2024", "Q1 2025", "Q2 2025", "Q3 2025"] plot_df = df.transpose().iloc[-10:] plot_df.columns = quarters companies = plot_df.index.tolist() x = np.arange(len(quarters)) # 4 quarter positions width =0.09# small width because 10 companies inside each cluster plt.figure(figsize=(14, 7))for i, company inenumerate(companies): plt.bar( x + i*width, # shift inside each quarter cluster plot_df.loc[company, quarters], # values for that company width, label=company, # show legend only once )# Fix x-axis tick positions (center the cluster labels) plt.xticks(x + width*len(companies)/2, quarters) plt.xlabel("Quarter") plt.ylabel("Value") plt.title(f"Clustered Bar Chart of {name} by Quarter") plt.legend(title="Company", loc='center left',bbox_to_anchor=(1.02, 0.5), ncol=1, frameon=False) plt.tight_layout() plt.show()clustered_bar_chart(MKT, "Market Capitalisation")clustered_bar_chart(EPS, "Earnings Per Share")clustered_bar_chart(PE, "P/E Ratio")
Show code
def stacked_bar_chart(df, name): quarters = ["Q4 2024", "Q1 2025", "Q2 2025", "Q3 2025"] plot_df = df.transpose().iloc[-10:] plot_df.columns = quarters companies = plot_df.index.tolist()# Convert all values to float plot_df = plot_df.apply(pd.to_numeric, errors="coerce") x = np.arange(len(quarters)) plt.figure(figsize=(14,7)) bottom = np.zeros(len(quarters), dtype=float)for company in companies: values = plot_df.loc[company, quarters].values.astype(float) plt.bar( x, values, bottom=bottom, label=company ) bottom += values # numeric safe plt.xticks(x, quarters) plt.xlabel("Quarter") plt.ylabel(f"Total Value of {name} (Stacked)") plt.title(f"Stacked Bar Chart of {name} (per Quarter)") plt.legend(title="Company", loc='center left', bbox_to_anchor=(1.02, 0.5), frameon=False) plt.tight_layout() plt.show()stacked_bar_chart(MKT, "Market Capitalisation")stacked_bar_chart(PE, "P/E Ratio")stacked_bar_chart(EPS, "Earnings Per Share")
Show code
def percentage_stacked_bar_chart(df, name): quarters = ["Q4 2024", "Q1 2025", "Q2 2025", "Q3 2025"] plot_df = df.transpose().iloc[-10:] plot_df.columns = quarters companies = plot_df.index.tolist()# Convert to numeric plot_df = plot_df.apply(pd.to_numeric, errors="coerce")# Normalize so each column sums to 100 df_pct = plot_df[quarters].div(plot_df[quarters].sum(axis=0), axis=1) *100 x = np.arange(len(quarters)) plt.figure(figsize=(14,7)) bottom = np.zeros(len(quarters))for company in companies: values = df_pct.loc[company, quarters].values.astype(float) plt.bar( x, values, bottom=bottom, label=company ) bottom += values plt.xticks(x, quarters) plt.xlabel("Quarter") plt.ylabel("Percentage Share (%)") plt.title(f"Percentage Stacked Bar Chart of {name}") plt.legend(title="Company", loc='center left', bbox_to_anchor=(1.02, 0.5), ncol=1, frameon=False) plt.ylim(0, 100) plt.tight_layout() plt.show()percentage_stacked_bar_chart(MKT, "Market Capitalisation")percentage_stacked_bar_chart(EPS, "Earnings Per Share")percentage_stacked_bar_chart(PE, "P/E Ratio")